#!/usr/bin/env python
# encoding: utf-8
"""
#-------------------------------------------------------------------#
#                   CONFIDENTIAL --- CUSTOM STUDIOS                 #     
#-------------------------------------------------------------------#
#                                                                   #
#                   @Project Name : Globallawonline                #
#                                                                   #
#                   @File Name    : dealcnkitreaty.py                      #
#                                                                   #
#                   @Programmer   : 李建                            #
#                                                                   #  
#                   @Start Date   : 2021/8/12 11:21                 #
#                                                                   #
#                   @Last Update  : 2021/8/12 11:21                 #
#                                                                   #
#-------------------------------------------------------------------#
# Classes:处理知网的国际条约数据为待导入状态                                                          #
#                                                                   #
#-------------------------------------------------------------------#
"""
import os
import shutil

from dealpdf import PdfFunction
import hashlib

country_dict = {"越南": "LAWCOUNTRYYN", "老挝": "LAWCOUNTRYLW", "印度尼西亚": "LAWCOUNTRYYDNXY", "新加坡": "LAWCOUNTRYXJP",
                "文莱": "LAWCOUNTRYWL", "泰": "LAWCOUNTRYTG", "缅甸": "LAWCOUNTRYMD", "柬埔寨": "LAWCOUNTRYJPZ",
                "马来西亚": "LAWCOUNTRYMLXY", "菲律宾": "LAWCOUNTRYFLB", "东南亚国家": "LAWCOUNTRYDM",
                "苏里南": "LAWCOUNTRYSLN"}
website_dict = {"双边条约":"Bilateral", "多边条约": "Regional", "国际公约":"International"}
PDF_CHOUTDIR = r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\PDF文件\导入\CNKI\CH"
PDF_ENOUTDIR = r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\PDF文件\导入\Treaty\CNKI\EN"
SYSID_list = []

def read_excel_ybc(excel_path, sheet_name, pdf_dir):
    """
    处理知网国际条约:东盟十国法规清单20210719.xlsxEXCEL文件的已补充表
    :param excel_path:excel文件详细路径
    :param sheet_name:数据表名
    :param pdf_dir:pdf文件路径
    :return:
    """
    import pandas as pd
    data = pd.read_excel(excel_path, sheet_name)
    data['SortA'] = ""
    data['哈希值计算'] = ""
    data['SYSID'] = ""
    data['SYS_FLD_DIGITFILENAME'] = ""
    data['FileUrl'] = ""
    data['Website'] = ""
    for i in range(0,len(data)):
        filename = str(data.iloc[i, 10])
        if filename == 'nan':
            continue
        SortA_list = []     # 该条条约对应的国家
        StateParty = str(data.iloc[i, 6])
        country_list = country_dict.keys()
        for country in country_list:
            if country in StateParty:
                SortA_list.append(country_dict[country])
        SortA = ",".join(SortA_list)
        CNKIID = str(data.iloc[i,0])
        SYSID = str(hashlib.md5(CNKIID.encode('utf-8')).hexdigest())
        if SYSID in SYSID_list:continue     # 若该条数据已经存在则该条数据不入库
        else:SYSID_list.append(SYSID)
        SYS_FLD_DIGITFILENAME = 'f' + SYSID + '.pdf'
        websitestr = str(data.iloc[i,7])
        website_list = website_dict.keys()
        if websitestr in website_list:
            Website = website_dict[websitestr]
        file_path = os.path.join(pdf_dir, filename)
        try:
            out_dir = os.path.join(PDF_CHOUTDIR, Website)
        except Exception as e:
            print(str(e))
        out_path = os.path.join(out_dir, SYS_FLD_DIGITFILENAME)
        FileUrl = '/PDF' + '/' + '/'.join(out_path.split('\\')[-5:])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        try:
            shutil.copyfile(file_path, out_path)
        except Exception as e:
            with open("copyerro0.txt", "a", encoding='utf-8') as f:
                f.write(file_path + '\n')
            print("文件复制出错:%s(%s)" % (file_path, str(e)))
        n = 10
        data.iloc[i, n + 1] = SortA
        data.iloc[i, n + 2] = CNKIID
        data.iloc[i, n + 3] = SYSID
        data.iloc[i, n + 4] = SYS_FLD_DIGITFILENAME
        data.iloc[i, n + 5] = FileUrl
        data.iloc[i, n + 6] = Website
        data.to_excel(r'E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\东盟十国法规清单20210719_已补充.xls')



def read_excel_klyy(excel_path, sheet_name, pdf_dir):
    """
    处理知网国际条约:东盟十国法规清单20210719.xlsxEXCEL文件的库里已有表
    :param excel_path:excel文件详细路径
    :param sheet_name:数据表名
    :param pdf_dir:pdf文件路径
    :return:
    """
    import pandas as pd
    data = pd.read_excel(excel_path, sheet_name)
    data['SortA'] = ""
    data['哈希值计算'] = ""
    data['SYSID'] = ""
    data['SYS_FLD_DIGITFILENAME'] = ""
    data['FileUrl'] = ""
    data['Website'] = ""
    for i in range(0,len(data)):
        filename = str(data.iloc[i, 7])
        if filename == 'nan':
            continue
        SortA_list = []     # 该条条约对应的国家
        StateParty = str(data.iloc[i, 5])
        country_list = country_dict.keys()
        for country in country_list:
            if country in StateParty:
                SortA_list.append(country_dict[country])
        SortA = ",".join(SortA_list)
        CNKIID = str(data.iloc[i,0])
        SYSID = str(hashlib.md5(CNKIID.encode('utf-8')).hexdigest())
        if SYSID in SYSID_list:continue     # 若该条数据已经存在则该条数据不入库
        else:SYSID_list.append(SYSID)
        SYS_FLD_DIGITFILENAME = 'f' + SYSID + '.pdf'
        websitestr = str(data.iloc[i,6])
        website_list = website_dict.keys()
        if websitestr in website_list:
            Website = website_dict[websitestr]
        file_path = os.path.join(pdf_dir, filename)
        try:
            out_dir = os.path.join(PDF_CHOUTDIR, Website)
        except Exception as e:
            print(str(e))
        out_path = os.path.join(out_dir, SYS_FLD_DIGITFILENAME)
        FileUrl = '/PDF' + '/' + '/'.join(out_path.split('\\')[-5:])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        try:
            shutil.copyfile(file_path, out_path)
        except Exception as e:
            with open("copyerro1.txt", "a", encoding='utf-8') as f:
                f.write(file_path + '\n')
            print("文件复制出错:%s(%s)" % (file_path, str(e)))
        n = 7
        data.iloc[i, n + 1] = SortA
        data.iloc[i, n + 2] = CNKIID
        data.iloc[i, n + 3] = SYSID
        data.iloc[i, n + 4] = SYS_FLD_DIGITFILENAME
        data.iloc[i, n + 5] = FileUrl
        data.iloc[i, n + 6] = Website
        data.to_excel(r'E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\东盟十国法规清单20210719_库里已有.xls')


def read_excel_ywqw(excel_path, sheet_name, pdf_dir):
    """
    处理知网国际条约:东盟十国法规清单20210719.xlsxEXCEL文件的库里已有表
    :param excel_path:excel文件详细路径
    :param sheet_name:数据表名
    :param pdf_dir:pdf文件路径
    :return:
    """
    import pandas as pd
    data = pd.read_excel(excel_path, sheet_name)
    data['SortA'] = ""
    data['哈希值计算'] = ""
    data['SYSID'] = ""
    data['SYS_FLD_DIGITFILENAME'] = ""
    data['FileUrl'] = ""
    data['Website'] = ""
    for i in range(0,len(data)):
        filename = str(data.iloc[i, 6])
        if filename == 'nan':
            continue
        SortA_list = []     # 该条条约对应的国家
        StateParty = str(data.iloc[i, 4])
        country_list = country_dict.keys()
        for country in country_list:
            if country in StateParty:
                SortA_list.append(country_dict[country])
        SortA = ",".join(SortA_list)
        CNKIID = str(data.iloc[i,0])
        SYSID = str(hashlib.md5(CNKIID.encode('utf-8')).hexdigest())
        if SYSID in SYSID_list:continue     # 若该条数据已经存在则该条数据不入库
        else:SYSID_list.append(SYSID)
        SYS_FLD_DIGITFILENAME = 'f' + SYSID + '.pdf'
        websitestr = str(data.iloc[i,5])
        website_list = website_dict.keys()
        if websitestr in website_list:
            Website = website_dict[websitestr]
        file_path = os.path.join(pdf_dir, filename)
        try:
            out_dir = os.path.join(PDF_ENOUTDIR, Website)
        except Exception as e:
            print(str(e))
        out_path = os.path.join(out_dir, SYS_FLD_DIGITFILENAME)
        FileUrl = '/PDF' + '/' + '/'.join(out_path.split('\\')[-5:])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        try:
            shutil.copyfile(file_path, out_path)
        except Exception as e:
            with open("copyerro2.txt", "a", encoding='utf-8') as f:
                f.write(file_path + '\n')
            print("文件复制出错:%s(%s)" % (file_path, str(e)))
        n = 6
        data.iloc[i, n + 1] = SortA
        data.iloc[i, n + 2] = CNKIID
        data.iloc[i, n + 3] = SYSID
        data.iloc[i, n + 4] = SYS_FLD_DIGITFILENAME
        data.iloc[i, n + 5] = FileUrl
        data.iloc[i, n + 6] = Website
        data.to_excel(r'E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\东盟十国法规清单20210719_英文全文.xls')


excel_path = r'E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\东盟十国法规清单20210719.xls'
shhetname_list = ["已补充","库里已有","英文全文"]
pdf_dir = r"E:\工作记录\工作内容\“一带一路”项目\各国专家提供的法律资源资料\国际分社国际条约\PDF文件"
for shhetname in shhetname_list:
    if shhetname == "已补充":
        pdf_dir_ybc = os.path.join(pdf_dir,'已补充')
        read_excel_ybc(excel_path,shhetname, pdf_dir_ybc)
    elif shhetname == "库里已有":
        pdf_dir_klyy = os.path.join(pdf_dir, "库里已有")
        read_excel_klyy(excel_path, shhetname, pdf_dir_klyy)
    elif shhetname == "英文全文":
        pdf_dir_ywqw = os.path.join(pdf_dir, "英文全文")
        read_excel_ywqw(excel_path, shhetname, pdf_dir_ywqw)